
# This file creates the main data frames for the paper --------------------

# install.packages("foreign")
# install.packages("dplyr")
# install.packages("date")

library(foreign)
library(dplyr)
library(date)

#Load data (set wd)
raw_data <- read.dta("stata_data.dta")

#Code age in days as numeric
raw_data$date <- as.numeric(raw_data$FOED_DAG)

#Function for deciding birth order
birth_order_func <- 
  function(date) {
    birth_order <- rep(NA, length(date))
    for (i in 1:length(date)){
      birth_order[i] <- i
      if (i > 1) {
        if((date[i] - date[i-1]) < 8){
          birth_order[i] <- birth_order[i-1]
        }
      }
    }
    return(birth_order)
  }

# Subset to data with mother id and set birth order
twin_mom <- 
  raw_data %>%
  filter(mor_id != "" )  %>%
  group_by(mor_id) %>%
  arrange(date) %>%
  mutate(birth_order = birth_order_func(date))

# Identify twins as same mother id born with same birth order 
twin_mom <- 
  twin_mom %>%
  group_by(mor_id, birth_order) %>%
  mutate(twin = length(date))

#Subset to persons with missing id for mother but not father and 
#define twins among them.

# Select mothers data with non-missing father id
twin_dad <- 
  twin_mom %>%
  filter(far_id != "")

# Identify birth order and twins among children with missing mother id 
twin_dad_2 <- 
  raw_data %>%
  filter(mor_id == "" & far_id != "")  %>%
  group_by(far_id) %>%
  arrange(date) %>%
  mutate(birth_order = birth_order_func(date))

twin_dad_2 <- 
  twin_dad_2 %>%
  group_by(far_id, birth_order) %>%
  mutate(twin = length(date))

# Bind the two father frames together 
twin_dad <-
  rbind(twin_dad, twin_dad_2)

# Generate indicator for being first-born and twin and second-born and twin

twin_mom <-
  as.data.frame(twin_mom) %>%
  mutate(twin_first  = twin > 1 & birth_order == 1,
         twin_second = twin > 1 & birth_order == 2)

twin_dad <-
  as.data.frame(twin_dad) %>%
  mutate(twin_first  = twin > 1 & birth_order == 1,
         twin_second = twin > 1 & birth_order == 2)

# numeric values for dates (born after Jan 1, 1994 or Jan 1, 1998)
date09 <- 8766 
date13 <- 10227

# numeric values for the election days
date09 <- 8766 
date13 <- 10227
date09_election <- 14565
date13_election <- 16028
date14_election <- 16216
  
  
# Filter of parents with first borns older than Jan 1 1994 
# Create dummy for first born in each election being born after cutoff dates (used to condition on later)
# Recode mother mother id to pnr for merging back on turnout
# Group by mother id 

child <-
  twin_mom %>% 
  mutate(pnr     = mor_id,
         kom2009_child = kom2009,
         bop2009_child = bop2009,
         kom2013_child = kom2013,
         bop2013_child = bop2013) %>%
  select(pnr, kom2009_child, bop2009_child, kom2013_child, bop2013_child)

twin_mom_co <- 
  left_join(twin_mom, child, by = "pnr") %>% 
  filter(!is.na(pnr)) %>%
  mutate(cohabit09 = (kom2009 == kom2009_child &  bop2009 == bop2009_child),
         cohabit13 = (kom2013 == kom2013_child &  bop2013 == bop2013_child)) %>%
  group_by(pnr) %>%
  summarise(cohabit09  = sum(cohabit09),
            cohabit13  = sum(cohabit13)) %>%
  select(pnr, cohabit09, cohabit13)

twin_mom_agg <- 
  twin_mom %>%
  filter(date >= date09) %>%
  mutate(first09   = birth_order == 1 & date >= date09,
         first13   = birth_order == 1 & date >= date13,
         pnr       = mor_id) %>%
  group_by(pnr) %>%
  summarise(twin_first = sum(twin_first),
            first09    = sum(first09),
            first13    = sum(first13),
            no_children_09 = sum(date < date09_election),
            no_children_13 = sum(date < date13_election),
            no_children_14 = sum(date < date14_election),
            age_oldest   = min(date)) %>%
  select(pnr, twin_first, first09, first13, no_children_09, 
         no_children_13, no_children_14, age_oldest)

# Select pnr, turnout data, and age
mothers <- 
  twin_mom %>%
  filter(female == 1) %>%
  select(pnr, stemte_2009, stemt, ep_stemt, date)

#Merge information about children to turnout
mothers <- 
  left_join(mothers, twin_mom_agg, by = "pnr") %>% 
  left_join(twin_mom_co,  by = "pnr")

# Repeat for fathers ----------------------------------------

child <-
  twin_dad %>% 
  mutate(pnr     = far_id,
         kom2009_child = kom2009,
         bop2009_child = bop2009,
         kom2013_child = kom2013,
         bop2013_child = bop2013) %>%
  select(pnr, kom2009_child, bop2009_child, kom2013_child, bop2013_child)

twin_dad_co <- 
  left_join(twin_dad, child, by = "pnr") %>% 
  filter(!is.na(pnr)) %>%
  mutate(cohabit09 = (kom2009 == kom2009_child &  bop2009 == bop2009_child),
         cohabit13 = (kom2013 == kom2013_child &  bop2013 == bop2013_child)) %>%
  group_by(pnr) %>%
  summarise(cohabit09  = sum(cohabit09),
            cohabit13  = sum(cohabit13)) %>%
  select(pnr, cohabit09, cohabit13)

twin_dad_agg <- 
  as.data.frame(twin_dad) %>%
  filter(date > date09) %>%
  mutate(first09 = birth_order == 1 & date >= date09,
         first13 = birth_order == 1 & date >= date13,
         pnr     = far_id) %>%
  group_by(pnr) %>%
  summarise(twin_first = sum(twin_first),
            first09    = sum(first09),
            first13    = sum(first13),
            no_children_09 = sum(date < date09_election),
            no_children_13 = sum(date < date13_election),
            no_children_14 = sum(date < date14_election),
            age_oldest   = min(date)) %>%
  select(pnr, twin_first, first09, first13, no_children_09, 
         no_children_13, no_children_14, age_oldest)

fathers <- 
  twin_dad %>%
  filter(female == 0) %>%
  select(pnr, stemte_2009, stemt, ep_stemt, date)

fathers <- 
  left_join(fathers, twin_dad_agg, by = "pnr") %>%
  left_join(twin_dad_co, by = "pnr")

save(mothers, file = "mothers.rdata")
save(fathers, file = "fathers.rdata")
